import requests
import re
import time
import random
import csv


class DyttSpider:
    def __init__(self):
        self.url = 'https://www.dytt8.net/html/gndy/dyzz/list_23_{}.html'
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36'}
        # 打开文件,并初始化csv文件写入对象
        self.f =open('dytt.cvs','w')
        self.writer=csv.writer(self.f)

    def get_html(self, url):
        html = requests.get(url=url, headers=self.headers).content.decode('gb2312', 'ignore')

        regex = '<td height="26">.*?<a href="(.*?)" class="ulink">(.*?)</a>'
        r_list = re.findall(regex, html, re.S)
        for r in r_list:
            # 将数据写入csv文件
            self.writer.writerow(r)
            print(r)

    def crawl(self):
        for page in range(1, 101):
            page_url = self.url.format(page)
            self.get_html(url=page_url)
            # 控制频率
            time.sleep(random.randint(1, 2))

        # 所有页的所有数据抓取完成后关闭文件
        self.f.close()

if __name__ == '__main__':
    spider = DyttSpider()
    spider.crawl()
